{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CRIM</th>\n",
       "      <th>ZN</th>\n",
       "      <th>INDUS</th>\n",
       "      <th>CHAS</th>\n",
       "      <th>NOX</th>\n",
       "      <th>RM</th>\n",
       "      <th>AGE</th>\n",
       "      <th>DIS</th>\n",
       "      <th>RAD</th>\n",
       "      <th>TAX</th>\n",
       "      <th>PTRATIO</th>\n",
       "      <th>B</th>\n",
       "      <th>LSTAT</th>\n",
       "      <th>MEDV</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.00632</td>\n",
       "      <td>18</td>\n",
       "      <td>2.31</td>\n",
       "      <td>0</td>\n",
       "      <td>0.538</td>\n",
       "      <td>6.575</td>\n",
       "      <td>65.2</td>\n",
       "      <td>4.0900</td>\n",
       "      <td>1</td>\n",
       "      <td>296</td>\n",
       "      <td>15</td>\n",
       "      <td>396.90</td>\n",
       "      <td>4.98</td>\n",
       "      <td>24.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.02731</td>\n",
       "      <td>0</td>\n",
       "      <td>7.07</td>\n",
       "      <td>0</td>\n",
       "      <td>0.469</td>\n",
       "      <td>6.421</td>\n",
       "      <td>78.9</td>\n",
       "      <td>4.9671</td>\n",
       "      <td>2</td>\n",
       "      <td>242</td>\n",
       "      <td>17</td>\n",
       "      <td>396.90</td>\n",
       "      <td>9.14</td>\n",
       "      <td>21.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.02729</td>\n",
       "      <td>0</td>\n",
       "      <td>7.07</td>\n",
       "      <td>0</td>\n",
       "      <td>0.469</td>\n",
       "      <td>7.185</td>\n",
       "      <td>61.1</td>\n",
       "      <td>4.9671</td>\n",
       "      <td>2</td>\n",
       "      <td>242</td>\n",
       "      <td>17</td>\n",
       "      <td>392.83</td>\n",
       "      <td>4.03</td>\n",
       "      <td>34.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.03237</td>\n",
       "      <td>0</td>\n",
       "      <td>2.18</td>\n",
       "      <td>0</td>\n",
       "      <td>0.458</td>\n",
       "      <td>6.998</td>\n",
       "      <td>45.8</td>\n",
       "      <td>6.0622</td>\n",
       "      <td>3</td>\n",
       "      <td>222</td>\n",
       "      <td>18</td>\n",
       "      <td>394.63</td>\n",
       "      <td>2.94</td>\n",
       "      <td>33.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.06905</td>\n",
       "      <td>0</td>\n",
       "      <td>2.18</td>\n",
       "      <td>0</td>\n",
       "      <td>0.458</td>\n",
       "      <td>7.147</td>\n",
       "      <td>54.2</td>\n",
       "      <td>6.0622</td>\n",
       "      <td>3</td>\n",
       "      <td>222</td>\n",
       "      <td>18</td>\n",
       "      <td>396.90</td>\n",
       "      <td>5.33</td>\n",
       "      <td>36.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      CRIM  ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD  TAX  PTRATIO  \\\n",
       "0  0.00632  18   2.31     0  0.538  6.575  65.2  4.0900    1  296       15   \n",
       "1  0.02731   0   7.07     0  0.469  6.421  78.9  4.9671    2  242       17   \n",
       "2  0.02729   0   7.07     0  0.469  7.185  61.1  4.9671    2  242       17   \n",
       "3  0.03237   0   2.18     0  0.458  6.998  45.8  6.0622    3  222       18   \n",
       "4  0.06905   0   2.18     0  0.458  7.147  54.2  6.0622    3  222       18   \n",
       "\n",
       "        B  LSTAT  MEDV  \n",
       "0  396.90   4.98  24.0  \n",
       "1  396.90   9.14  21.6  \n",
       "2  392.83   4.03  34.7  \n",
       "3  394.63   2.94  33.4  \n",
       "4  396.90   5.33  36.2  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取数据\n",
    "df = pd.read_csv(\"./boston_housing.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 506 entries, 0 to 505\n",
      "Data columns (total 14 columns):\n",
      "CRIM       506 non-null float64\n",
      "ZN         506 non-null int64\n",
      "INDUS      506 non-null float64\n",
      "CHAS       506 non-null int64\n",
      "NOX        506 non-null float64\n",
      "RM         506 non-null float64\n",
      "AGE        506 non-null float64\n",
      "DIS        506 non-null float64\n",
      "RAD        506 non-null int64\n",
      "TAX        506 non-null int64\n",
      "PTRATIO    506 non-null int64\n",
      "B          506 non-null float64\n",
      "LSTAT      506 non-null float64\n",
      "MEDV       506 non-null float64\n",
      "dtypes: float64(9), int64(5)\n",
      "memory usage: 55.4 KB\n"
     ]
    }
   ],
   "source": [
    "#数据整体信息\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 下面是特征工程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据去躁\n",
    "#df = df[df.MEDV<50]\n",
    "#print(df.shape)\n",
    "\n",
    "#数据分离，将特征与标签分离\n",
    "y = df[\"MEDV\"]\n",
    "X = df.drop([\"MEDV\"], axis=1)\n",
    "\n",
    "#对标签（价格）y做log变换\n",
    "log_y = np.log1p(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "离散型特征编码\n",
    "离散特征可以通过独热编码（one-hot encode），将原来有K种取值的离散型特征变成K维0-1编码特征，这K维特征中只有一个是1（独热），其余维均为0.\n",
    "独热编码可以用pandas的get_dummies方法（哑编码）或者Scikit-Learn中的OneHotEncoder类来实现。\n",
    "\n",
    "get_dummies方法要求输入特征的类型是非数值型（\"object\"）；\n",
    "而OneHotEncoder要求输入是整数。如果是字符串要先用LabelEncoder变成整数。（但LabelEncoder输出是一维（1D）数组，而OneHotEncoder要求输出是2D数组，需要在二者之间进行格式转换）。\n",
    "\n",
    "另外如果训练数据和测试数据不能同时获得的话，需要用OneHotEncoder，用训练集训练编码器，然后对训练集和测试集进行编码；\n",
    "而get_dummies是依赖于DataFrame，只适用于一个数据集情况。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 特征RAD指距离高速公路的便利指数。其数值虽是数值型，但实际是索引，可换成离散特征/类别型特征编码试试。\n",
    "X[\"RAD\"].astype(\"object\")\n",
    "X_cat = X[\"RAD\"]\n",
    "X_cat = pd.get_dummies(X_cat, prefix=\"RAD\")#对类别型特征编码。prefix参数表示在值前面加前缀RAD表类别，用于索引\n",
    "\n",
    "#去除原数据矩阵中RAD一列，后续会拼接编码后的数组\n",
    "X = X.drop(\"RAD\", axis=1)\n",
    "\n",
    "feat_names = X.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RAD_1</th>\n",
       "      <th>RAD_2</th>\n",
       "      <th>RAD_3</th>\n",
       "      <th>RAD_4</th>\n",
       "      <th>RAD_5</th>\n",
       "      <th>RAD_6</th>\n",
       "      <th>RAD_7</th>\n",
       "      <th>RAD_8</th>\n",
       "      <th>RAD_24</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   RAD_1  RAD_2  RAD_3  RAD_4  RAD_5  RAD_6  RAD_7  RAD_8  RAD_24\n",
       "0      1      0      0      0      0      0      0      0       0\n",
       "1      0      1      0      0      0      0      0      0       0\n",
       "2      0      1      0      0      0      0      0      0       0\n",
       "3      0      0      1      0      0      0      0      0       0\n",
       "4      0      0      1      0      0      0      0      0       0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_cat.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数值型特征预处理\n",
    "在之前数据探索时可知，不同特征的数据差异太大，需要进行标准化预处理。标准化的目的在于避免原始特征值差异过大，导致训练得到的参数权重单位不一致，无法比较各特征的重要性。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#数据标准化StandardScaler或最大最小化MinMaxScaler\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "#初始化特征和输出值的标准化器\n",
    "ss_X = StandardScaler()\n",
    "ss_y = StandardScaler()\n",
    "ss_log_y = StandardScaler()\n",
    "\n",
    "# 分别对训练和测试数据的特征以及输出值进行标准化处理\n",
    "# 对训练数据，先调用fit方法训练模型，得到模型参数；然后对训练数据和测试数据进行transform\n",
    "X = ss_X.fit_transform(X)\n",
    "\n",
    "#对y做标准化不是必须\n",
    "#对y标准化的好处是不同问题的w差异不太大，同时正则参数的范围也有限\n",
    "y = ss_y.fit_transform(y.values.reshape(-1, 1))\n",
    "log_y = ss_log_y.fit_transform(log_y.values.reshape(-1, 1))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 保存特征工程的结果到文件，供机器学习模型使用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CRIM</th>\n",
       "      <th>ZN</th>\n",
       "      <th>INDUS</th>\n",
       "      <th>CHAS</th>\n",
       "      <th>NOX</th>\n",
       "      <th>RM</th>\n",
       "      <th>AGE</th>\n",
       "      <th>DIS</th>\n",
       "      <th>TAX</th>\n",
       "      <th>PTRATIO</th>\n",
       "      <th>...</th>\n",
       "      <th>RAD_2</th>\n",
       "      <th>RAD_3</th>\n",
       "      <th>RAD_4</th>\n",
       "      <th>RAD_5</th>\n",
       "      <th>RAD_6</th>\n",
       "      <th>RAD_7</th>\n",
       "      <th>RAD_8</th>\n",
       "      <th>RAD_24</th>\n",
       "      <th>MEDV</th>\n",
       "      <th>Log_MEDV</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.419782</td>\n",
       "      <td>0.285654</td>\n",
       "      <td>-1.287909</td>\n",
       "      <td>-0.272599</td>\n",
       "      <td>-0.144217</td>\n",
       "      <td>0.413672</td>\n",
       "      <td>-0.120013</td>\n",
       "      <td>0.140214</td>\n",
       "      <td>-0.666608</td>\n",
       "      <td>-1.353192</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.159686</td>\n",
       "      <td>0.345176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-0.417339</td>\n",
       "      <td>-0.487292</td>\n",
       "      <td>-0.593381</td>\n",
       "      <td>-0.272599</td>\n",
       "      <td>-0.740262</td>\n",
       "      <td>0.194274</td>\n",
       "      <td>0.367166</td>\n",
       "      <td>0.557160</td>\n",
       "      <td>-0.987329</td>\n",
       "      <td>-0.475352</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-0.101524</td>\n",
       "      <td>0.084104</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.417342</td>\n",
       "      <td>-0.487292</td>\n",
       "      <td>-0.593381</td>\n",
       "      <td>-0.272599</td>\n",
       "      <td>-0.740262</td>\n",
       "      <td>1.282714</td>\n",
       "      <td>-0.265812</td>\n",
       "      <td>0.557160</td>\n",
       "      <td>-0.987329</td>\n",
       "      <td>-0.475352</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.324247</td>\n",
       "      <td>1.266776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.416750</td>\n",
       "      <td>-0.487292</td>\n",
       "      <td>-1.306878</td>\n",
       "      <td>-0.272599</td>\n",
       "      <td>-0.835284</td>\n",
       "      <td>1.016303</td>\n",
       "      <td>-0.809889</td>\n",
       "      <td>1.077737</td>\n",
       "      <td>-1.106115</td>\n",
       "      <td>-0.036432</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.182758</td>\n",
       "      <td>1.170822</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-0.412482</td>\n",
       "      <td>-0.487292</td>\n",
       "      <td>-1.306878</td>\n",
       "      <td>-0.272599</td>\n",
       "      <td>-0.835284</td>\n",
       "      <td>1.228577</td>\n",
       "      <td>-0.511180</td>\n",
       "      <td>1.077737</td>\n",
       "      <td>-1.106115</td>\n",
       "      <td>-0.036432</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.487503</td>\n",
       "      <td>1.373242</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 23 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       CRIM        ZN     INDUS      CHAS       NOX        RM       AGE  \\\n",
       "0 -0.419782  0.285654 -1.287909 -0.272599 -0.144217  0.413672 -0.120013   \n",
       "1 -0.417339 -0.487292 -0.593381 -0.272599 -0.740262  0.194274  0.367166   \n",
       "2 -0.417342 -0.487292 -0.593381 -0.272599 -0.740262  1.282714 -0.265812   \n",
       "3 -0.416750 -0.487292 -1.306878 -0.272599 -0.835284  1.016303 -0.809889   \n",
       "4 -0.412482 -0.487292 -1.306878 -0.272599 -0.835284  1.228577 -0.511180   \n",
       "\n",
       "        DIS       TAX   PTRATIO  ...  RAD_2  RAD_3  RAD_4  RAD_5  RAD_6  \\\n",
       "0  0.140214 -0.666608 -1.353192  ...      0      0      0      0      0   \n",
       "1  0.557160 -0.987329 -0.475352  ...      1      0      0      0      0   \n",
       "2  0.557160 -0.987329 -0.475352  ...      1      0      0      0      0   \n",
       "3  1.077737 -1.106115 -0.036432  ...      0      1      0      0      0   \n",
       "4  1.077737 -1.106115 -0.036432  ...      0      1      0      0      0   \n",
       "\n",
       "   RAD_7  RAD_8  RAD_24      MEDV  Log_MEDV  \n",
       "0      0      0       0  0.159686  0.345176  \n",
       "1      0      0       0 -0.101524  0.084104  \n",
       "2      0      0       0  1.324247  1.266776  \n",
       "3      0      0       0  1.182758  1.170822  \n",
       "4      0      0       0  1.487503  1.373242  \n",
       "\n",
       "[5 rows x 23 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fe_data = pd.DataFrame(data=X, columns=feat_names)\n",
    "fe_data = pd.concat([fe_data, X_cat], axis=1, ignore_index=False)#拼接数组。参数ignore_index为False表示不忽视索引名\n",
    "\n",
    "#添加标签y和log_y\n",
    "fe_data[\"MEDV\"] = y\n",
    "fe_data[\"Log_MEDV\"] = log_y\n",
    "\n",
    "fe_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 506 entries, 0 to 505\n",
      "Data columns (total 23 columns):\n",
      "CRIM        506 non-null float64\n",
      "ZN          506 non-null float64\n",
      "INDUS       506 non-null float64\n",
      "CHAS        506 non-null float64\n",
      "NOX         506 non-null float64\n",
      "RM          506 non-null float64\n",
      "AGE         506 non-null float64\n",
      "DIS         506 non-null float64\n",
      "TAX         506 non-null float64\n",
      "PTRATIO     506 non-null float64\n",
      "B           506 non-null float64\n",
      "LSTAT       506 non-null float64\n",
      "RAD_1       506 non-null uint8\n",
      "RAD_2       506 non-null uint8\n",
      "RAD_3       506 non-null uint8\n",
      "RAD_4       506 non-null uint8\n",
      "RAD_5       506 non-null uint8\n",
      "RAD_6       506 non-null uint8\n",
      "RAD_7       506 non-null uint8\n",
      "RAD_8       506 non-null uint8\n",
      "RAD_24      506 non-null uint8\n",
      "MEDV        506 non-null float64\n",
      "Log_MEDV    506 non-null float64\n",
      "dtypes: float64(14), uint8(9)\n",
      "memory usage: 59.9 KB\n"
     ]
    }
   ],
   "source": [
    "fe_data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存结果到文件\n",
    "fe_data.to_csv('FE_boston_housing.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
